Adding a check for messages which include hidden chars (mostly written by Niklas...
authorRotem Liss <rotem@users.mediawiki.org>
Sat, 5 Aug 2006 18:13:33 +0000 (18:13 +0000)
committerRotem Liss <rotem@users.mediawiki.org>
Sat, 5 Aug 2006 18:13:33 +0000 (18:13 +0000)
maintenance/checkLanguage.php
maintenance/languages.inc

index 1681ff0..db29941 100644 (file)
@@ -75,4 +75,9 @@ $nonXHTMLMessages = $wgLanguages->getNonXHTMLMessages( $code );
 $nonXHTMLMessagesNumber = count( $nonXHTMLMessages );
 $wgLanguages->outputMessagesList( $nonXHTMLMessages, "\n$nonXHTMLMessagesNumber messages of $localMessagesNumber in $code are not well-formed XHTML:", $wgHideMessages );
 
+# Non-XHTML messages
+$messagesWithWrongChars = $wgLanguages->getMessagesWithWrongChars( $code );
+$messagesWithWrongCharsNumber = count( $messagesWithWrongChars );
+$wgLanguages->outputMessagesList( $messagesWithWrongChars, "\n$messagesWithWrongCharsNumber messages of $localMessagesNumber in $code include hidden chars which should not be used in the messages:", $wgHideMessages );
+
 ?>
index f8bee1c..0f7ef75 100644 (file)
@@ -263,6 +263,43 @@ class languages {
                return $nonXHTMLMessages;
        }
 
+       /**
+        * Get the messages which include wrong characters.
+        *
+        * @param $code The langauge code.
+        *
+        * @return The messages which include wrong characters in this language.
+        */
+       public function getMessagesWithWrongChars( $code ) {
+               $this->loadMessages( 'en' );
+               $this->loadMessages( $code );
+               $wrongChars = array(
+                       '[LRM]' => "\xE2\x80\x8E",
+                       '[RLM]' => "\xE2\x80\x8F",
+                       '[LRE]' => "\xE2\x80\xAA",
+                       '[RLE]' => "\xE2\x80\xAB",
+                       '[POP]' => "\xE2\x80\xAC",
+                       '[LRO]' => "\xE2\x80\xAD",
+                       '[RLO]' => "\xE2\x80\xAB",
+                       '[ZWSP]'=> "\xE2\x80\x8B",
+                       '[NBSP]'=> "\xC2\xA0",
+                       '[WJ]'  => "\xE2\x81\xA0",
+                       '[BOM]' => "\xEF\xBB\xBF",
+                       '[FFFD]'=> "\xEF\xBF\xBD",
+               );
+               $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu';
+               $nonXHTMLMessages = array();
+               foreach ( $this->mMessages[$code] as $key => $value ) {
+                       if ( isset( $this->mMessages['en'][$key] ) && preg_match( $wrongRegExp, $value ) ) {
+                               foreach ( $wrongChars as $viewableChar => $hiddenChar ) {
+                                       $value = str_replace( $hiddenChar, $viewableChar, $value );
+                               }
+                               $nonXHTMLMessages[$key] = $value;
+                       }
+               }
+               return $nonXHTMLMessages;
+       }
+
        /**
         * Output a messages list.
         *